Let \(X_t\) be the true concentration of chlorophyll-a on day \(t\) and \(Y_t\) be the measured value on day \(t\).

These are all our covariates:

Set up

library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.2
## Loaded modules: basemod,bugs
library(ecoforecastR)

source("01_download_data.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
source("02_combine_data.R")

Null Model(s)

Random Walk Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

source("fit_random_walk.R")
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1988
##    Unobserved stochastic nodes: 3464
##    Total graph size: 5459
## 
## Initializing model

Diagnostics

# Discard burn-in
rwalk.params <- window(rwalk.jags.out[,1:2], start = 1000)

# Plot and summarize
plot(rwalk.params)

summary(rwalk.params)
## 
## Iterations = 1000:3000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 2001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##          Mean     SD Naive SE Time-series SE
## tau_add  4.54 0.2086 0.002692       0.008552
## tau_obs 26.24 2.9695 0.038327       0.188643
## 
## 2. Quantiles for each variable:
## 
##          2.5%    25%    50%    75%  97.5%
## tau_add  4.15  4.394  4.532  4.676  4.969
## tau_obs 20.77 24.185 26.127 28.146 32.338
cor(as.matrix(rwalk.params))
##            tau_add    tau_obs
## tau_add  1.0000000 -0.5015991
## tau_obs -0.5015991  1.0000000
pairs(as.matrix(rwalk.params))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the random walk model
x.cols <- grep("^x",colnames(rwalk.out)) ## grab all columns that start with the letter x
ci_rwalk <- apply(rwalk.out[,x.cols], 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_rwalk[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Random Walk Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_rwalk[1,], ci_rwalk[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Previous Year’s Chlorophyll-a Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t} \sim N(X_{t-365}, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

source("fit_previous_year_model.R")
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 1988
##    Unobserved stochastic nodes: 3464
##    Total graph size: 5459
## 
## Initializing model

Diagnostics

# Discard burn-in
pyear.params <- window(pyear.jags.out[,1:2], start = 1000)

# Plot and summarize
plot(pyear.params)

summary(pyear.params)
## 
## Iterations = 1000:3000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 2001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##            Mean      SD  Naive SE Time-series SE
## tau_add 10.7405 2.13682 0.0275793      0.2707542
## tau_obs  0.5337 0.02016 0.0002602      0.0007134
## 
## 2. Quantiles for each variable:
## 
##           2.5%    25%     50%    75%   97.5%
## tau_add 6.9381 9.2698 10.6500 12.096 15.0802
## tau_obs 0.4944 0.5202  0.5337  0.547  0.5732
cor(as.matrix(pyear.params))
##            tau_add    tau_obs
## tau_add  1.0000000 -0.1540243
## tau_obs -0.1540243  1.0000000
pairs(as.matrix(pyear.params))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the random walk model
x.cols <- grep("^x",colnames(pyear.out)) ## grab all columns that start with the letter x
ci_pyear <- apply(pyear.out[,x.cols], 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_pyear[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Previous Year's Chlorophyll-A Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_pyear[1,], ci_pyear[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Internal Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t + \beta_0 + \beta_{DO} Z_{DO, t} + \beta_{pH}Z_{pH, t} + \beta_{\text{turb}}Z_{\text{turb}, t} + \beta_X X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

internal_model <- ecoforecastR::fit_dlm(model = list(obs = "chla", fixed = "1 + X + oxygen + daily_pH + daily_turbidity"), cleaned_data)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n         betaX ~dnorm(0,0.001)\n      betaIntercept~dnorm(0,0.001)\n     betaoxygen~dnorm(0,0.001)\n     betadaily_pH~dnorm(0,0.001)\n     betadaily_turbidity~dnorm(0,0.001)\n   for(j in 1: 4 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\nXf[t,4] ~ dnorm(muXf[4],tauXf[4])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaX*x[t-1] + betaIntercept*Xf[t,1] + betaoxygen*Xf[t,2] + betadaily_pH*Xf[t,3] + betadaily_turbidity*Xf[t,4]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 10834
##    Unobserved stochastic nodes: 5531
##    Total graph size: 29922
## 
## Initializing model
#names(internal.model)

Parameter Diagnostics

## parameter diagnostics
params_internal <- window(internal_model$params,start=1000) ## remove burn-in
plot(params_internal)

cor(as.matrix(params_internal))
##                     betaIntercept       betaX betadaily_pH betadaily_turbidity
## betaIntercept         1.000000000 -0.19955100  -0.89879067         0.278366410
## betaX                -0.199551003  1.00000000  -0.05942872        -0.101625484
## betadaily_pH         -0.898790675 -0.05942872   1.00000000        -0.209413021
## betadaily_turbidity   0.278366410 -0.10162548  -0.20941302         1.000000000
## betaoxygen           -0.370530743  0.42769451  -0.06232867        -0.191762212
## tau_add               0.001725704  0.15732799  -0.04135022        -0.007302287
## tau_obs              -0.019776863 -0.17953650   0.08543615         0.023733511
##                      betaoxygen      tau_add     tau_obs
## betaIntercept       -0.37053074  0.001725704 -0.01977686
## betaX                0.42769451  0.157327995 -0.17953650
## betadaily_pH        -0.06232867 -0.041350215  0.08543615
## betadaily_turbidity -0.19176221 -0.007302287  0.02373351
## betaoxygen           1.00000000  0.067046537 -0.12433732
## tau_add              0.06704654  1.000000000 -0.49596573
## tau_obs             -0.12433732 -0.495965726  1.00000000
pairs(as.matrix(params_internal))

summary(params_internal)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                          Mean        SD  Naive SE Time-series SE
## betaIntercept        0.483313  0.175565 1.602e-03      4.153e-02
## betaX               -0.066675  0.006992 6.382e-05      4.524e-04
## betadaily_pH         0.017243  0.029875 2.727e-04      6.866e-03
## betadaily_turbidity  0.002493  0.002490 2.273e-05      8.308e-05
## betaoxygen          -0.057924  0.009211 8.408e-05      8.474e-04
## tau_add              4.056867  0.165396 1.510e-03      7.428e-03
## tau_obs             73.988710 18.703492 1.707e-01      1.830e+00
## 
## 2. Quantiles for each variable:
## 
##                          2.5%        25%       50%       75%      97.5%
## betaIntercept        0.101678  0.3778720  0.479955  0.598912   0.822937
## betaX               -0.080535 -0.0713667 -0.066709 -0.062018  -0.052879
## betadaily_pH        -0.042270 -0.0016617  0.015671  0.037635   0.081739
## betadaily_turbidity -0.002331  0.0007786  0.002487  0.004172   0.007412
## betaoxygen          -0.075043 -0.0645623 -0.057936 -0.051410  -0.039397
## tau_add              3.751396  3.9403425  4.049350  4.166064   4.398804
## tau_obs             44.961024 60.5967453 71.322518 84.724575 118.396927

Time Series Plot

time.rng = c(1,nrow(cleaned_data))       ## you can adjust this line to zoom in and out on specific time intervals

out <- as.matrix(internal_model$predict)
ci <- apply(out,2,quantile,c(0.025,0.5,0.975))

plot(cleaned_data$datetime,ci[2,],type='n',ylim=range(cleaned_data$chla,na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Internal Factors Model")
## adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]],cleaned_data$datetime[time.rng[2]],by='month'), format = "%Y-%m")
}
ecoforecastR::ciEnvelope(cleaned_data$datetime,ci[1,],ci[3,],col=ecoforecastR::col.alpha("lightBlue",0.75))
points(cleaned_data$datetime, cleaned_data$chla,pch="+",cex=0.5)

cor(as.matrix(params_internal))
##                     betaIntercept       betaX betadaily_pH betadaily_turbidity
## betaIntercept         1.000000000 -0.19955100  -0.89879067         0.278366410
## betaX                -0.199551003  1.00000000  -0.05942872        -0.101625484
## betadaily_pH         -0.898790675 -0.05942872   1.00000000        -0.209413021
## betadaily_turbidity   0.278366410 -0.10162548  -0.20941302         1.000000000
## betaoxygen           -0.370530743  0.42769451  -0.06232867        -0.191762212
## tau_add               0.001725704  0.15732799  -0.04135022        -0.007302287
## tau_obs              -0.019776863 -0.17953650   0.08543615         0.023733511
##                      betaoxygen      tau_add     tau_obs
## betaIntercept       -0.37053074  0.001725704 -0.01977686
## betaX                0.42769451  0.157327995 -0.17953650
## betadaily_pH        -0.06232867 -0.041350215  0.08543615
## betadaily_turbidity -0.19176221 -0.007302287  0.02373351
## betaoxygen           1.00000000  0.067046537 -0.12433732
## tau_add              0.06704654  1.000000000 -0.49596573
## tau_obs             -0.12433732 -0.495965726  1.00000000

External Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(\beta_0 + \beta_{\text{temp}} Z_{\text{temp}, t} + \beta_{lr}Z_{lr, t} + \beta_{sr}Z_{sr, t} + \beta_{\text{prec}} Z_{\text{prec}, t}, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

# Set up data object (with NA handling built-in)
data_ext <- list(
  y = cleaned_data$chla,
  n = length(cleaned_data$chla),
  temp = cleaned_data$temperature,
  longrad = cleaned_data$surface_downwelling_longwave_flux_in_air,
  shortrad = cleaned_data$surface_downwelling_shortwave_flux_in_air,
  precip = cleaned_data$precipitation_flux,
  x_ic = 1, tau_ic = 100,
  a_obs = 1, r_obs = 1,
  a_add = 1, r_add = 1
)

# Fit the model — this version has no X term
model_ext <- "~ 1 + temp + longrad + shortrad + precip"

ef.out.external <- ecoforecastR::fit_dlm(
  model = list(obs = "y", fixed = model_ext),
  data = data_ext
)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n        betaIntercept~dnorm(0,0.001)\n     betatemp~dnorm(0,0.001)\n     betalongrad~dnorm(0,0.001)\n     betashortrad~dnorm(0,0.001)\n     betaprecip~dnorm(0,0.001)\n   for(j in 1: 5 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\nXf[t,4] ~ dnorm(muXf[4],tauXf[4])\nXf[t,5] ~ dnorm(muXf[5],tauXf[5])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaIntercept*Xf[t,1] + betatemp*Xf[t,2] + betalongrad*Xf[t,3] + betashortrad*Xf[t,4] + betaprecip*Xf[t,5]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 12067
##    Unobserved stochastic nodes: 7025
##    Total graph size: 32336
## 
## Initializing model
# Optional: save results
# save(ef.out.external, file = "external_factors.RData")

Diagnostics

# Discard burn-in
params_external <- window(ef.out.external$params, start = 1000)

# Plot and summarize
plot(params_external)

summary(params_external)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                     Mean        SD  Naive SE Time-series SE
## betaIntercept  0.0370840 1.343e-01 1.226e-03      2.225e-02
## betalongrad   -0.0002788 5.276e-04 4.816e-06      1.236e-04
## betaprecip     0.5311987 3.955e-01 3.610e-03      2.665e-02
## betashortrad  -0.0001340 2.864e-04 2.614e-06      2.657e-05
## betatemp       0.0031493 4.576e-03 4.176e-05      7.134e-04
## tau_add        4.0559096 1.831e-01 1.672e-03      9.815e-03
## tau_obs       60.3528694 1.537e+01 1.403e-01      1.444e+00
## 
## 2. Quantiles for each variable:
## 
##                     2.5%        25%        50%       75%     97.5%
## betaIntercept -0.2361274 -0.0548322  0.0411104 1.386e-01 2.759e-01
## betalongrad   -0.0011841 -0.0006934 -0.0003027 8.414e-05 8.029e-04
## betaprecip    -0.2681030  0.2655871  0.5382155 7.998e-01 1.293e+00
## betashortrad  -0.0006847 -0.0003318 -0.0001386 5.943e-05 4.342e-04
## betatemp      -0.0064288  0.0002047  0.0032261 6.507e-03 1.119e-02
## tau_add        3.7229731  3.9289469  4.0475000 4.174e+00 4.441e+00
## tau_obs       36.5178659 49.1561949 57.9021651 6.925e+01 9.758e+01
cor(as.matrix(params_external))
##               betaIntercept betalongrad  betaprecip betashortrad    betatemp
## betaIntercept    1.00000000 -0.91190463  0.39774455   -0.1567416  0.41539852
## betalongrad     -0.91190463  1.00000000 -0.35514670    0.1492671 -0.66579612
## betaprecip       0.39774455 -0.35514670  1.00000000    0.3496880 -0.15626735
## betashortrad    -0.15674161  0.14926707  0.34968803    1.0000000 -0.63762859
## betatemp         0.41539852 -0.66579612 -0.15626735   -0.6376286  1.00000000
## tau_add          0.06597076 -0.03961318  0.09959162    0.1020362 -0.07473018
## tau_obs          0.01470687 -0.08129367 -0.04503806   -0.1697564  0.21550078
##                   tau_add     tau_obs
## betaIntercept  0.06597076  0.01470687
## betalongrad   -0.03961318 -0.08129367
## betaprecip     0.09959162 -0.04503806
## betashortrad   0.10203622 -0.16975642
## betatemp      -0.07473018  0.21550078
## tau_add        1.00000000 -0.56749727
## tau_obs       -0.56749727  1.00000000
pairs(as.matrix(params_external))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the external factors model
out_ext <- as.matrix(ef.out.external$predict)
ci_ext <- apply(out_ext, 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_ext[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "External Factors Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_ext[1,], ci_ext[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Combined Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t + \beta_0 + \beta_{\text{temp}} Z_{\text{temp}, t} + \beta_{\text{prec}} Z_{\text{prec}, t} + \beta_X X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

# set up data object for ecoforecastR
data <- list(y = cleaned_data$chla,
             n = length(cleaned_data$chla),      ## data
             temp = cleaned_data$temperature,
             longrad = cleaned_data$surface_downwelling_longwave_flux_in_air,
             shortrad = cleaned_data$surface_downwelling_shortwave_flux_in_air,
             precip = cleaned_data$precipitation_flux,
             x_ic=1,tau_ic=100,      ## initial condition prior
             a_obs=1,r_obs=1,           ## obs error prior
             a_add=1,r_add=1            ## process error prior
)

## fit the model
model2 <- "~ 1 + X + temp + precip"
ef.out.combined <- ecoforecastR::fit_dlm(model=list(obs="y",fixed=model2),data)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n         betaX ~dnorm(0,0.001)\n      betaIntercept~dnorm(0,0.001)\n     betatemp~dnorm(0,0.001)\n     betaprecip~dnorm(0,0.001)\n   for(j in 1: 3 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaX*x[t-1] + betaIntercept*Xf[t,1] + betatemp*Xf[t,2] + betaprecip*Xf[t,3]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 8757
##    Unobserved stochastic nodes: 4880
##    Total graph size: 24160
## 
## Initializing model
#save(ef.out.combined, file = "combined_factors.RData")

Parameter Diagnostics

#load("combined_factors.RData")

## parameter diagnostics
params <- window(ef.out.combined$params,start=1000) ## remove burn-in
plot(params)

summary(params)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                    Mean        SD  Naive SE Time-series SE
## betaIntercept -0.092966  0.055985 5.110e-04      0.0041711
## betaX         -0.059952  0.006373 5.817e-05      0.0002518
## betaprecip     0.644734  0.324538 2.962e-03      0.0100431
## betatemp       0.008548  0.002488 2.271e-05      0.0002043
## tau_add        4.068364  0.171405 1.565e-03      0.0082590
## tau_obs       71.129591 16.696464 1.524e-01      1.5827871
## 
## 2. Quantiles for each variable:
## 
##                    2.5%       25%       50%      75%     97.5%
## betaIntercept -0.207805 -0.130492 -0.092385 -0.05419   0.01344
## betaX         -0.072554 -0.064103 -0.059849 -0.05566  -0.04757
## betaprecip     0.008938  0.423727  0.648828  0.86946   1.26703
## betatemp       0.003834  0.006805  0.008523  0.01015   0.01372
## tau_add        3.753290  3.949714  4.058013  4.17895   4.42633
## tau_obs       41.595512 59.779260 69.879392 80.79990 108.51715
cor(as.matrix(params))
##               betaIntercept      betaX  betaprecip    betatemp     tau_add
## betaIntercept    1.00000000  0.1096183  0.19624558 -0.95014130 -0.01865980
## betaX            0.10961831  1.0000000 -0.11060735 -0.32160779  0.12718455
## betaprecip       0.19624558 -0.1106073  1.00000000 -0.30448948  0.04622121
## betatemp        -0.95014130 -0.3216078 -0.30448948  1.00000000 -0.02105147
## tau_add         -0.01865980  0.1271845  0.04622121 -0.02105147  1.00000000
## tau_obs         -0.05749788 -0.1469492 -0.03542011  0.09352818 -0.51516900
##                   tau_obs
## betaIntercept -0.05749788
## betaX         -0.14694924
## betaprecip    -0.03542011
## betatemp       0.09352818
## tau_add       -0.51516900
## tau_obs        1.00000000
pairs(as.matrix(params))

Time-Series Plot

## confidence interval
time.rng = c(1,nrow(cleaned_data))       ## you can adjust this line to zoom in and out on specific time intervals

out <- as.matrix(ef.out.combined$predict)
ci <- apply(out,2,quantile,c(0.025,0.5,0.975))

plot(cleaned_data$datetime,ci[2,],type='n',ylim=range(cleaned_data$chla,na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Combined Factors Model")
## adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]],cleaned_data$datetime[time.rng[2]],by='month'), format = "%Y-%m")
}
ecoforecastR::ciEnvelope(cleaned_data$datetime,ci[1,],ci[3,],col=ecoforecastR::col.alpha("lightBlue",0.75))
points(cleaned_data$datetime, cleaned_data$chla,pch="+",cex=0.5)